library(phyloseq)
library(multcompView)
library(seqinr)
## Warning: package 'seqinr' was built under R version 3.3.2
library(phangorn) 
## Warning: package 'phangorn' was built under R version 3.3.2
## Loading required package: ape
## Warning: package 'ape' was built under R version 3.3.2
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
## 
##     as.alignment, consensus
library(caroline)

options(stringsAsFactors = FALSE)

taxcolors <- matrix(c("#8dd3c7", "#bebada", "#fb8072"), 
                    dimnames=list(c("CladeA", "CladeC", "CladeD")))

load("Data/Moorea_sym100_f.RData")  # loads phyloseq object named phy.f
phy100.f <- phy.f # renames to phy100.f
load("Data/Moorea_sym_f.RData")  # loads phyloseq object named phy.f
load("tempdata.RData") # loads the temperature data from temperature_analysis


# Convert to proportion (relative abundance)
phy.f.p <- transform_sample_counts(phy.f, function(x) x/sum(x))
phy100.f.p <- transform_sample_counts(phy100.f, function(x) x/sum(x))
# Apply transformation function
transform <- function(x) sqrt(x/sum(x))  # Set transformation function
phy.f.t <- transform_sample_counts(phy.f, transform)  # Transform data
phy100.f.t <- transform_sample_counts(phy100.f, transform)  # Transform data

write.table(data.frame(tax_table(phy.f)), "Data/tax.table.txt", row.names=T, quote=F)

Unifrac Distance

#https://rdrr.io/rforge/seqinr/man/dist.alignment.html
#returns sqrt of pairwise genetic distance, then squared the matrices
A.seqs <- read.alignment(file = "Data/A_tree_seqs_aligned_clean.fasta", format= "fasta")
A.dis <- (as.matrix(dist.alignment(A.seqs, matrix = "identity" )))^2
write.csv(A.dis, file="Data/A.dis.matx.csv")

C.seqs <- read.alignment(file = "Data/C_tree_seqs_aligned_clean.fasta", format= "fasta")
C.dis <- (as.matrix(dist.alignment(C.seqs, matrix = "identity" )))^2
write.csv(C.dis, file="Data/C.dis.matx.csv")

D.seqs <- read.alignment(file = "Data/D_tree_seqs_aligned_clean.fasta", format= "fasta")
D.dis <- (as.matrix(dist.alignment(D.seqs, matrix = "identity" )))^2
write.csv(D.dis, file="D.dis.matx.csv")

A_C <- matrix(0.1960, ncol=ncol(A.dis), nrow=nrow(C.dis), dimnames=list(rownames(C.dis), colnames(A.dis)))
A_D <- matrix(0.1775, ncol=ncol(A.dis), nrow=nrow(D.dis), dimnames=list(rownames(D.dis), colnames(A.dis)))
C_D <- matrix(0.1520, ncol=ncol(C.dis), nrow=nrow(D.dis), dimnames=list(rownames(D.dis), colnames(C.dis)))
  
#build ACD matrix
col1 <- rbind(A.dis, A_C, A_D)
col2 <- rbind(matrix(NA, nrow=nrow(A.dis), ncol=ncol(C.dis), dimnames=list(rownames(A.dis), colnames(C.dis))), C.dis, C_D)
col3 <- rbind(matrix(NA, nrow=nrow(A.dis)+nrow(C.dis), ncol=ncol(D.dis), dimnames=list(c(rownames(A.dis), rownames(C.dis)), colnames(D.dis))), D.dis)

ubermatrix <- cbind(col1, col2, col3)
dim(ubermatrix)
## [1] 245 245
#build tree
uber.tree <- phangorn::upgma(ubermatrix)
plot(uber.tree, main="UPGMA")

# Slot uber tree into the phy_tree slot of the phyloseq object
phy_tree(phy.f) <- phy_tree(uber.tree)
phy_tree(phy.f.p) <- phy_tree(uber.tree)

# Calculate weighted Unifrac distance
UF.dis <- UniFrac(phy.f, weighted=TRUE, normalized=F, parallel=FALSE, fast=TRUE)

#NMDS on 97%-within-sample OTUs
ord <- ordinate(phy.f, "NMDS", UF.dis, trymax=100, trace=F)
## Run 0 stress 0.02441644 
## Run 1 stress 0.02442356 
## ... Procrustes: rmse 0.0004574289  max resid 0.002075781 
## ... Similar to previous best
## Run 2 stress 0.04724342 
## Run 3 stress 0.0273301 
## Run 4 stress 0.06507061 
## Run 5 stress 0.1998683 
## Run 6 stress 0.02422882 
## ... New best solution
## ... Procrustes: rmse 0.007788374  max resid 0.04981734 
## Run 7 stress 0.04728297 
## Run 8 stress 0.06484664 
## Run 9 stress 0.0273006 
## Run 10 stress 0.02438606 
## ... Procrustes: rmse 0.007873963  max resid 0.04950934 
## Run 11 stress 0.02732767 
## Run 12 stress 0.0243915 
## ... Procrustes: rmse 0.007874658  max resid 0.04948466 
## Run 13 stress 0.02441619 
## ... Procrustes: rmse 0.00781145  max resid 0.04989669 
## Run 14 stress 0.04728086 
## Run 15 stress 0.02422396 
## ... New best solution
## ... Procrustes: rmse 0.0004517597  max resid 0.001910792 
## ... Similar to previous best
## Run 16 stress 0.06435801 
## Run 17 stress 0.06412837 
## Run 18 stress 0.06497383 
## Run 19 stress 0.02718338 
## Run 20 stress 0.04727928 
## *** Solution reached
ord2 <- ordinate(phy.f, "PCoA", "wunifrac")
# Plot the coral samples
plot_ordination(phy.f, ord, color="Species", shape="Site",
                title="97% within-sample OTUs")

# Plot the Symbiodinium taxa
plot_ordination(phy.f, ord2, color="Clade", type="species",
                title="97% within-sample OTUs")

Clade overview

The relative abundance of each clade in the entire dataset (normalized for differences in sequencing depth across samples).

cladeAbund <- aggregate(data.frame(RelAbund=rowSums(otu_table(phy.f.p))),
                        by=list(Clade=data.frame(tax_table(phy.f.p))$Clade), FUN=sum)
cladeAbund$Prop <- prop.table(cladeAbund$RelAbund)

bars <- barplot(cladeAbund$Prop*100, col=taxcolors, space=0,
                names.arg=cladeAbund$Clade, xlab=expression(paste(italic('Symbiodinium'), " Clade")),
                ylab="Relative abundance (%)")
text(bars, cladeAbund$Prop*100+2, labels=round(cladeAbund$Prop*100, 1), xpd=T)

cladeAbund$Notus <- table(data.frame(tax_table(phy.f.p))$Clade)
cladeAbund$Notus
## 
##   A   C   D 
##  32 195  18

Clade compostion of each sample

In the first chart, bars are colored according to Symbiodinium clade. In the second chart, bars are colored according to OTU.

composition <- function(phy, col, legend=T) {
  samdat <- data.frame(sample_data(phy))
  #samdat$Genus <- factor(samdat$Genus, levels=rev(levels(samdat$Genus)))
  samdat$Species <- factor(samdat$Species, levels=rev(levels(samdat$Species)))
  samdat$Site <- factor(samdat$Site, levels=rev(levels(samdat$Site)))
  samdat <- samdat[with(samdat, order(Species, Site, -DNA.ID)), ]
  typerelabund <- as.matrix(otu_table(phy)[order(data.frame(tax_table(phy))$hit), 
                                           rownames(samdat)])
  sitebreaks <- c(as.character(samdat$Site), "X")==c("X", as.character(samdat$Site))
  sitebreaks <- which(sitebreaks==F) - 1
  spbreaks <- c(which(duplicated(samdat$Species)==F) - 1, nrow(samdat))
  
  # Make Barplot
  barplot(typerelabund, horiz=T, space=0, axes=F,axisnames=F, yaxs="i", col=col)
  rect(0, 0, par("usr")[2], par("usr")[4], lwd=1, xpd=T)
  axis(side=1, at=seq(0, 1, 0.1), line=0, tck=-0.025, mgp=c(0,0.25,0), cex.axis=0.7)
  mtext(side=1, "Relative abundance", cex=0.7, line=1)
  # Add legend
  if (legend==T) {
    legend(x=par("usr")[2]/2, y=par("usr")[4], xjust=0.5, yjust=0.25, horiz=T, bty="n", xpd=T, 
           cex=0.7, legend=c("A", "C", "D"), fill=taxcolors, x.intersp=0.5)
    legend(x=par("usr")[2]*1.1, y=par("usr")[4]*0.75, xjust=0, yjust=0.1, bty="n", xpd=T, cex=1, 
           pt.cex=1, legend=c("Site 1", "Site 2", "Site 3", "Site 7", "Site 8", "Site 9"), fill=c("purple", "blue", "green", "yellow", "orange", "red"), y.intersp=0.7, 
           x.intersp=0.3)
  }
  # Add grouping bars for Site
  sitecolors <- matrix(c("red", "orange", "yellow", "green", "blue", "purple"), 
              dimnames=list(c("Site 1", "Site 2", "Site 3", "Site 7", "Site 8", "Site 9"))) 
  for (i in 1:length(sitebreaks)) {
    lines(c(0, 1), c(sitebreaks[i], sitebreaks[i]), lty=2, lwd=0.25)
    rect(1.01, sitebreaks[i], 1.04, sitebreaks[i+1], col=sitecolors[samdat$Site[sitebreaks[i]+1],], 
         lwd=0.25, xpd=T)
  }
  
  # Add lines to separate species and species names
  for (i in 1:length(spbreaks)) {
    lines(c(0, 1.07), c(spbreaks[i], spbreaks[i]), xpd=T, type="l", lwd=0.4)
    text(1.03, (spbreaks[i] + spbreaks[i+1]) / 2, xpd=T, pos=4, cex=0.8,
         labels=paste(samdat$Genus[which(duplicated(samdat$Species)==F)][i], "\n",
                      samdat$Species[which(duplicated(samdat$Species)==F)][i], sep=""))
  }
  for (i in 1:nrow(samdat)) {
    text(0, i-0.5, rownames(samdat)[i], xpd=T, cex=0.7, pos=2)
  }
}

par(mfrow=c(1,1), mar=c(2, 1.5, 2, 10), lwd=0.1, cex=0.7, xpd=NA)
# Plot composition of 97% within-sample OTUs colored by clade
composition(phy.f.p, col=taxcolors[factor(data.frame(tax_table(phy.f.p))[order(data.frame(tax_table(phy.f.p))$Subtype), ]$Clade, levels=c("A","C","D"))], legend=T)

composition(phy.f.p, col=rainbow(ntaxa(phy.f.p)), legend=T)

#plot_richness(phy, x="Site", color = "Species")

Analyze temperature distributions for each site

# Plot temperature data
plot(temp.all$Date.Time, temp.all$Site.1, pch=15, col="darkred", cex=0.1, xlab="Time", ylab="Temperature °C",ylim=c(24,32))
points(temp.all$Date.Time, temp.all$Site.2, pch=15, col="red", cex=0.1)
points(temp.all$Date.Time, temp.all$Site.3, pch=15, col="pink", cex=0.1)
points(temp.all$Date.Time, temp.all$Site.7, pch=15, col="darkblue", cex=0.1)
points(temp.all$Date.Time, temp.all$Site.8, pch=15, col="blue", cex=0.1)
points(temp.all$Date.Time, temp.all$Site.9, pch=15, col="lightblue", cex=0.1)

#points(temp.all$Date.Time, temp.all$MCR1, pch=15, col="gray", cex=0.1)
#points(temp.all$Date.Time, temp.all$MCR6, pch=15, col="black", cex=0.1)

# Plot temperature distributions for each site
par(mfrow=c(2,3), mar=c(3,3,2,2), mgp=c(1.5,0.1,0), tcl=-0.2)
hist(temp.all$Site.1, xlim=c(25,32), breaks=seq(25,32,0.25), main="Site 1", xlab="Temp ?C")
hist(temp.all$Site.2, xlim=c(25,32), breaks=seq(25,32,0.25), main="Site 2", xlab="Temp ?C")
hist(temp.all$Site.3, xlim=c(25,32), breaks=seq(25,32,0.25), main="Site 3", xlab="Temp ?C")
hist(temp.all$Site.7, xlim=c(25,32), breaks=seq(25,32,0.25), main="Site 7", xlab="Temp ?C")
hist(temp.all$Site.8, xlim=c(25,32), breaks=seq(25,32,0.25), main="Site 8", xlab="Temp ?C")
hist(temp.all$Site.9, xlim=c(25,32), breaks=seq(25,32,0.25), main="Site 9", xlab="Temp ?C")

#hist(temp.all$MCR1, xlim=c(26.5,31.5), breaks=20, main="MCR1", xlab="Temp ?C")
#hist(temp.all$MCR6, xlim=c(26.5,31.5), breaks=20, main="MCR6", xlab="Temp ?C")

# View Descriptive Stats
temp.summ
##            mean       var    min    max        skew         dip
## Site.1 28.39648 0.8103580 26.158 30.722 -0.12671287 0.015065362
## Site.2 28.17516 0.8312129 25.960 30.170 -0.23322440 0.025254305
## Site.3 28.26482 0.8876240 25.914 30.419 -0.23305308 0.022069496
## Site.7 28.25800 1.1869338 25.331 31.484 -0.06973277 0.009957781
## Site.8 28.20707 0.9927550 25.671 30.900 -0.11178187 0.009786352
## Site.9 28.25297 1.0513679 25.574 31.510 -0.07039514 0.010267284
##        daily.mean.min daily.mean.max daily.mean.range daysover30
## Site.1       27.91200       28.87190        0.9598974         50
## Site.2       27.82315       28.38038        0.5572315          3
## Site.3       27.81051       28.71863        0.9081241         21
## Site.7       27.53253       29.25548        1.7229547        108
## Site.8       27.69738       28.71489        1.0175155         53
## Site.9       27.64471       28.97983        1.3351193         64
##        daysover31 daysunder27 daysunder27.5 daysunder28
## Site.1          0           2             9          49
## Site.2          0          10            60         175
## Site.3          0           1             8          81
## Site.7         11           1            17          49
## Site.8          0          11            44         114
## Site.9          7           3            16          70
# Plot rank order of various metrics
with(temp.summ, {
  par(mar=c(3,3,3,11), mfrow=c(1,1),xpd=TRUE)
  plot(NA, xaxt="n", xlim=c(1,6), ylim=c(1,6), xlab="", ylab="Rank Order")
  axis(side=1, at=1:6, labels = rownames(temp.summ))
  lines(rank(mean), type="b", pch=15, col="gray")
  lines(rank(var), type="b", pch=5, col="green")
  lines(rank(skew), type="b", pch=2, col="blue")
  # lines(rank(kurt), type="b", pch=4, col="purple")
  # lines(rank(hflat), type="b", pch=6, col="black")
  lines(rank(dip), type="b", pch=7, col="pink")
  lines(rank(daily.mean.min),type="b",pch=4, col="purple")
  lines(rank(daily.mean.max), type="b", pch=6, col="black")
  lines(rank(daily.mean.range), type="b",pch=5,col="orange")
  lines(rank(daysover30),type="b",pch=5,col="red")
  lines(rank(daysunder27.5),type="b",pch=5,col="darkblue")
  legend(6.5,7, legend=c("mean", "variance", "skewness", "bimodality", "mean daily min", "mean daily max", "mean daily range", "days over 30","days under 27.5"),
         lty=1, col=c("gray", "green", "blue", "pink", "purple", "orange","red","darkblue"),
         pch=c(15,5,2,7,4,6,5,5,5), inset=c(0,-0.4), xpd=NA)
})

## Importance of components:
##                           PC1    PC2     PC3    PC4     PC5       PC6
## Standard deviation     2.2887 1.4279 0.74442 0.3710 0.17708 1.148e-14
## Proportion of Variance 0.6548 0.2549 0.06927 0.0172 0.00392 0.000e+00
## Cumulative Proportion  0.6548 0.9096 0.97888 0.9961 1.00000 1.000e+00

##                       PC1         PC2         PC3         PC4         PC5
## mean           0.03493912 0.321749862 0.072732874 0.147057739 0.009746932
## var            0.14445058 0.137028844 0.115861169 0.002505728 0.035124972
## skew           0.15048315 0.003287871 0.192190096 0.039196923 0.360707768
## dip            0.14716593 0.008590837 0.200179872 0.219444382 0.264212980
## daily.mean.min 0.13145586 0.183869073 0.116761297 0.014129753 0.078233860
## daily.mean.max 0.15458394 0.081798025 0.093065973 0.116634277 0.043898005
## daysover30     0.15966493 0.005789603 0.007226683 0.220066620 0.182036429
## daysunder27.5  0.07725649 0.257885885 0.201982036 0.240964577 0.026039054
##                       PC6
## mean           0.24586921
## var            0.03678062
## skew           0.07167463
## dip            0.03867851
## daily.mean.min 0.32253999
## daily.mean.max 0.12966112
## daysover30     0.10412841
## daysunder27.5  0.05066752

Temperature regime spider plot

# Library
library(fmsb)
## Warning: package 'fmsb' was built under R version 3.3.2
temp.summ2 <- rbind(apply(temp.summ, 2, range), temp.summ)

sitecols <- c("#762a83", "#9970ab", "#c2a5cf", "#a6dba0", "#5aae61", "#1b7837")

radarchart(
  temp.summ2[,c("var", "daysunder27.5",
                "dip", "daily.mean.min", "mean", "daily.mean.max", "skew", "daysover30")],
  #custom polygon
  plwd=4, plty=1,
  
  #custom colors
  pcol=sitecols,
  pfcol=scales::alpha(sitecols, 0.25),
 
  #custom the grid
  cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,20,5), cglwd=0.8,
 
  #custom labels
  vlcex=0.8 
)

legend("topright", rownames(temp.summ2)[-(1:2)])

Multivariate analyses

Evenness

# Calculate Shannon Index and Pielou's Evenness for each sample
H <- diversity(t(otu_table(phy.f.p)))  # Shannon Index
J <- H/log(specnumber(t(otu_table(phy.f.p))))  # Pielou's Evenness
adiv <- cbind(data.frame(H), data.frame(J), sample_data(phy.f))
# Test for differences in Shannon index across species and sites
shannon.aov <- aov(adiv$H ~ adiv$Species * adiv$Site)
summary(shannon.aov)  # Significant Species effect
##                        Df Sum Sq Mean Sq F value   Pr(>F)    
## adiv$Species            1  4.298   4.298  44.619 3.75e-08 ***
## adiv$Site               5  0.398   0.080   0.826    0.538    
## adiv$Species:adiv$Site  5  0.236   0.047   0.490    0.782    
## Residuals              43  4.142   0.096                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test for differences in Pielou's evenness across species and sites
pielou.aov <- aov(adiv$J ~ adiv$Species * adiv$Site)
summary(pielou.aov)  # Pielou: significant Species effect
##                        Df Sum Sq Mean Sq F value   Pr(>F)    
## adiv$Species            1 0.6667  0.6667  48.059 1.82e-08 ***
## adiv$Site               5 0.0431  0.0086   0.621    0.685    
## adiv$Species:adiv$Site  5 0.0280  0.0056   0.403    0.844    
## Residuals              42 0.5826  0.0139                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
# Plot diversity and evenness for Acropora
par(mfrow=c(2,1))
with(subset(adiv, Species=="Acropora"), {
  boxplot(H ~ Site, ylim=c(0, 2), ylab="Pielou's Evenness",
        outline=F, border="black",
        main="Acropora Symbiodinium Diversity (Shannon)")
  boxplot(J ~ Site, ylim=c(0, 1), ylab="Pielou's Evenness",
        outline=F, border="black",
        main="Acropora Symbiodinium Evenness")
})

# Plot diversity and evenness for Porites
with(subset(adiv, Species=="Porties"), {
  boxplot(H ~ Site, ylim=c(0, 0.5), ylab="Shannon Index",
        outline=F, border="black",
        main="Porites Symbiodinium Diversity")
  boxplot(J ~ Site, ylim=c(0, 0.5), ylab="Pielou's Evenness",
        outline=F, border="black",
        main="Porites Symbiodinium Evenness")
})

Do diversity and evenness correlate with temperature regimes across sites?

porites.diversity <- with(subset(adiv, Species=="Porties"), aggregate(H, list(Site), FUN=mean, na.rm=T))
porites.evenness <- with(subset(adiv, Species=="Porties"), aggregate(J, list(Site), FUN=mean, na.rm=T))
acropora.diversity <- with(subset(adiv, Species=="Acropora"), aggregate(H, list(Site), FUN=mean, na.rm=T))
acropora.evenness <- with(subset(adiv, Species=="Acropora"), aggregate(J, list(Site), FUN=mean, na.rm=T))

par(mfrow=c(2,1))
plot(pca.temp$x[,1], porites.diversity$x, ylim=c(0,1), main="Diversity vs. Temp. PC1")
points(pca.temp$x[,1], acropora.diversity$x, col="red")
plot(pca.temp$x[,1], porites.evenness$x, ylim=c(0,0.6), main="Evenness vs. Temp. PC1")
points(pca.temp$x[,1], acropora.evenness$x, col="red")

#legend("topleft", legend=c("Acropora", "Porites"), col=c("red", "black"), pch=21)

par(mfrow=c(2,1))
plot(pca.temp$x[,2], porites.diversity$x, ylim=c(0,1), main="Diversity vs. Temp. PC2")
points(pca.temp$x[,2], acropora.diversity$x, col="red")
plot(pca.temp$x[,2], porites.evenness$x, ylim=c(0,0.6), main="Evenness vs. Temp. PC2")
points(pca.temp$x[,2], acropora.evenness$x, col="red")

#legend("topleft", legend=c("Acropora", "Porites"), col=c("red", "black"), pch=21)

par(mfrow=c(2,1))
plot(pca.temp$x[,3], porites.diversity$x, ylim=c(0,1), main="Diversity vs. Temp. PC3")
points(pca.temp$x[,3], acropora.diversity$x, col="red")
plot(pca.temp$x[,3], porites.evenness$x, ylim=c(0,0.6), main="Evenness vs. Temp. PC3")
points(pca.temp$x[,3], acropora.evenness$x, col="red")

#legend("topleft", legend=c("Acropora", "Porites"), col=c("red", "black"), pch=21)
Evenness does not appear to differ among sites for either species and shows no relationship with temperature regimes

Does number of clades correspond to temperature regime?

# Get and count clades present in each sample
clades <- apply(data.frame(otu_table(phy.f), check.names=F), 2, function(x) {
  unique(data.frame(tax_table(phy.f))[names(which(x!=0)), "Clade"])
})

sample_data(phy.f) <- within(sample_data(phy.f), {
  nclades <- sapply(clades, length)
})

# Plot number of clades across sites
with(subset(sample_data(phy.f), Species=="Porties"), plot(nclades ~ Site, main = "Number of clades in Porites"))

with(subset(sample_data(phy.f), Species=="Acropora"), plot(nclades ~ Site, main="Number of clades in Acropora"))

Does abundance of clade D correspond to temperature regime?

# Test if abundance of clade D correlates with temp regime
cladeAbund <- apply(otu_table(phy.f.p), 2, function(x) aggregate(x, by=list(Clade=data.frame(tax_table(phy.f.p))$Clade), FUN=sum)$x)
rownames(cladeAbund) <- c("A", "C", "D")

sample_data(phy.f.p) <- within(sample_data(phy.f.p), {
  A <- t(cladeAbund)[, "A"]
  C <- t(cladeAbund)[, "C"]
  D <- t(cladeAbund)[, "D"]
  PC1 <- sample_data(phy.f)$PCA1
  PC2 <- sample_data(phy.f)$PCA2
  PC3 <- sample_data(phy.f)$PCA3
})

with(subset(sample_data(phy.f.p), Species=="Porties"), {
  par(mfrow=c(1,3))
  plot(D ~ PC1)
  plot(D ~ PC2)
  plot(D ~ PC3)
})

cor(apply(data.frame(subset(sample_data(phy.f.p), Species=="Porties")[, c("D", "PC1", "PC2", "PC3")]), 2, as.numeric))
##              D          PC1         PC2          PC3
## D    1.0000000 -0.355903059  0.23110344 -0.100036628
## PC1 -0.3559031  1.000000000  0.01144529  0.005817143
## PC2  0.2311034  0.011445294  1.00000000 -0.070648038
## PC3 -0.1000366  0.005817143 -0.07064804  1.000000000
with(subset(sample_data(phy.f.p), Species=="Acropora"), {
  par(mfrow=c(1,3))
  plot(D ~ PC1)
  plot(D ~ PC2)
  plot(D ~ PC3)
})

cor(apply(data.frame(subset(sample_data(phy.f.p), Species=="Acropora")[, c("D", "PC1", "PC2", "PC3")]), 2, as.numeric))
##              D         PC1         PC2         PC3
## D    1.0000000  0.19661359 -0.44737508  0.16546966
## PC1  0.1966136  1.00000000 -0.02695757 -0.04138878
## PC2 -0.4473751 -0.02695757  1.00000000 -0.02984774
## PC3  0.1654697 -0.04138878 -0.02984774  1.00000000

Does beta diversity correspond to temperature regime?

devtools::source_url("https://raw.githubusercontent.com/jrcunning/STJ2012/master/R/functions.R")
## SHA-1 hash of file is ce544e1c040045182ad40e84b2464ad7cae1f73c
acropora <- subset_samples(phy.f.p, Species=="Acropora")
acrobd <- betad(acropora, group="Site")

porites <- subset_samples(phy.f.p, Species=="Porties")
porbd <- betad(porites, group="Site")

par(mfrow=c(3,1))
plot(pca.temp$x[,1], porbd$sambdsumm$mean, ylim=c(0,0.7), main="Beta diversity vs. Temp. PC1")
points(pca.temp$x[,1], acrobd$sambdsumm$mean, col="red")
plot(pca.temp$x[,2], porbd$sambdsumm$mean, ylim=c(0,0.7), main="Beta diversity vs. Temp. PC2")
points(pca.temp$x[,2], acrobd$sambdsumm$mean, col="red")
plot(pca.temp$x[,3], porbd$sambdsumm$mean, ylim=c(0,0.7), main="Beta diversity vs. Temp. PC3")
points(pca.temp$x[,3], acrobd$sambdsumm$mean, col="red")

Does phylogenetically informed beta diversity correspond to temperature regime?

betadiv <- function(phy, group) {
  # Calculate distances to species centroids for each sample in principal coordinate space
  samdat <- data.frame(sample_data(phy))
  UFdist <- UniFrac(phy, weighted=TRUE, normalized=F, parallel=FALSE, fast=TRUE)
  sambd <- betadisper(d=UFdist, group=samdat[, group], 
                      type="centroid", bias.adjust=FALSE)
  # Calculate each species' mean distance to centroid and standard error
  sambdsumm <- aggregate(data.frame(mean=as.vector(sambd$distances)), by=list(group=sambd$group), FUN=mean)
  sambdsumm$se <- aggregate(as.vector(sambd$distances), by=list(group=sambd$group), 
                            FUN=function(x) sd(x)/sqrt(length(x)))$x
  # Determine order of decreasing mean dispersion
  sambdsumm.ord <- sambdsumm[order(sambdsumm$mean, decreasing=T), ]
  # Update betadisper object with species in decreasing order of mean dispersion
  sambd <- betadisper(d=UFdist, 
                      group=factor(samdat[, group], levels=as.character(sambdsumm.ord$group)), 
                      type="centroid", bias.adjust=FALSE)
  # Use permutations to perform pairwise comparisons of group mean dispersions
  sampt <- permutest(sambd, pairwise=T)
  saml <- multcompLetters(sampt$pairwise$permuted)
  # Return results as a list
  return(list(sambdsumm.ord=sambdsumm.ord, sambdsumm=sambdsumm, saml=saml))
}

acropora.bd <- subset_samples(phy.f.p, Species=="Acropora")
acrobdiv <- betadiv(acropora.bd, group="Site")

porites.bd <- subset_samples(phy.f.p, Species=="Porties")
porbdiv <- betadiv(porites.bd, group="Site")

par(mfrow=c(3,1))
plot(pca.temp$x[,1], porbdiv$sambdsumm$mean, ylim=c(0,0.15), main="Phylogenetically Informed Beta diversity vs. Temp. PC1")
points(pca.temp$x[,1], acrobdiv$sambdsumm$mean, col="red")
plot(pca.temp$x[,2], porbdiv$sambdsumm$mean, ylim=c(0,0.15), main="Phylogenetically Informed Beta diversity vs. Temp. PC2")
points(pca.temp$x[,2], acrobdiv$sambdsumm$mean, col="red")
plot(pca.temp$x[,3], porbdiv$sambdsumm$mean, ylim=c(0,0.15), main="Phylogenetically Informed Beta diversity vs. Temp. PC3")
points(pca.temp$x[,3], acrobdiv$sambdsumm$mean, col="red")

par(mfrow=c(3,1))
plot(temp.summ$daysover30, porbdiv$sambdsumm$mean, ylim=c(0,0.15), main="Phylogenetically Informed Beta diversity vs. Days over 30")
points(temp.summ$daysover30, acrobdiv$sambdsumm$mean, col="red")

plot(temp.summ$daily.mean.range, porbdiv$sambdsumm$mean, ylim=c(0,0.15), main="Phylogenetically Informed Beta diversity vs. Day Mean Range")
points(temp.summ$daily.mean.range, acrobdiv$sambdsumm$mean, col="red")

plot(temp.summ$var, porbdiv$sambdsumm$mean, ylim=c(0,0.15), main="Phylogenetically Informed Beta diversity vs. Variance")
points(temp.summ$var, acrobdiv$sambdsumm$mean, col="red")

Constrained ordination - do temperature metrics drive community structure?

### Now try constrained ordination 
phy.f.porites <- subset_samples(phy.f,Species=="Porties")
phy.f.acropora <- subset_samples(phy.f,Species=="Acropora")

## Constrained ordination
# Ordinate using first for principal components of the temperature metrics
ord.CAP <- ordinate(phy.f,method="CAP",distance="wunifrac",formula= ~ PCA1 + PCA2 + PCA3 + PCA4)
plot_ordination(phy.f, ord.CAP, color="Species", shape="Site",type="scree",
                title="97% within-sample OTUs - CAP")

# Ordinate using all temperature metrics included in PCA above (just to see if it's different)
ord.CAP2 <- ordinate(phy.f,method="CAP",distance="wunifrac",formula= ~ mean + var + skew + dip + daily.mean.min + daily.mean.max + daily.mean.range + daysover30 + daysunder27.5)
plot_ordination(phy.f, ord.CAP2, color="Species", shape="Site",type="scree",
                title="97% within-sample OTUs - CAP")

### For both of these constrained analyses, it looks like temperature metrics account for approximately 11% of the variability, but there is a large amount of variability that does not fit into the constrained axes.
# This may be due to species differences, so next we try each species individually
ord.porites.CAP <- ordinate(phy.f.porites,method="CAP",distance="wunifrac",formula= ~ PCA1 + PCA2 + PCA3 + PCA4)
plot_ordination(phy.f.porites, ord.porites.CAP, color="Species", shape="Site",type="scree",
                title="97% within-sample OTUs - CAP Porites only")

plot_ordination(phy.f.porites, ord.porites.CAP, color="nclades", shape="Site",
                title="97% within-sample OTUs - CAP Porites only")

# Now temperature accounts for about 30% of the variability in Porites-associated Symbiodinium communities
ord.acropora.CAP <- ordinate(phy.f.acropora,method="CAP",distance="wunifrac",formula= ~ PCA1 + PCA2 + PCA3 + PCA4)
plot_ordination(phy.f.acropora, ord.acropora.CAP, color="Species", shape="Site",type="scree",
                title="97% within-sample OTUs - CAP Acropora only")

plot_ordination(phy.f.acropora, ord.acropora.CAP, color="nclades", shape="Site",
                title="97% within-sample OTUs - CAP Acropora only")

# Now temperature accounts for almost 40% of the variability in Acropora-associated Symbidoinium communities